In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2
import sys
import collections
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import tqdm
import patsy
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy.stats

sys.path.append('../src/')
import data_processing
import ripple_decoding
import ripple_detection

Animal = collections.namedtuple('Animal', {'directory', 'short_name'})
animals = {'HPa': Animal(directory='HPa_direct', short_name='HPa')}
epoch_index = ('HPa', 1, 4)
sampling_frequency = 1500

In [2]:
print('\nDecoding ripples for Animal {0}, Day {1}, Epoch #{2}:'.format(*epoch_index))
# Include only CA1 neurons with spikes
neuron_info = data_processing.make_neuron_dataframe(animals)[epoch_index].dropna()
tetrode_info = data_processing.make_tetrode_dataframe(animals)[epoch_index]
neuron_info = pd.merge(tetrode_info, neuron_info,
                       on=['animal', 'day', 'epoch_ind', 'tetrode_number', 'area'],
                       how='right', right_index=True).set_index(neuron_info.index)
neuron_info = neuron_info[neuron_info.area.isin(['CA1', 'iCA1']) &
                          (neuron_info.numspikes > 0) &
                          ~neuron_info.descrip.str.endswith('Ref').fillna(False)]
print(neuron_info)
# Train on when the rat is moving
position_info = data_processing.get_interpolated_position_dataframe(
    epoch_index, animals)
spikes_data = [data_processing.get_spike_indicator_dataframe(neuron_index, animals)
               for neuron_index in neuron_info.index]

# Make sure there are spikes in the training data times. Otherwise exclude that neuron
MEAN_RATE_THRESHOLD = 0.10  # in Hz
spikes_data = [spikes_datum for spikes_datum in spikes_data
               if spikes_datum[position_info.speed > 4].is_spike.mean() *
               sampling_frequency > MEAN_RATE_THRESHOLD]

train_position_info = position_info.query('speed > 4')
train_spikes_data = [spikes_datum[position_info.speed > 4]
                     for spikes_datum in spikes_data]


Decoding ripples for Animal HPa, Day 1, Epoch #4:
                                                   area  depth descrip  \
animal day epoch_ind tetrode_number neuron_number                        
HPa    1   4         1              1               CA1    114  riptet   
                                    2               CA1    114  riptet   
                                    3               CA1    114  riptet   
                                    4               CA1    114  riptet   
                                    5               CA1    114  riptet   
                                    6               CA1    114  riptet   
                     4              1               CA1    117  riptet   
                                    2               CA1    117  riptet   
                                    3               CA1    117  riptet   
                                    4               CA1    117  riptet   
                                    5               CA1    117  riptet   
                                    6               CA1    117  riptet   
                                    7               CA1    117  riptet   
                                    9               CA1    117  riptet   
                                    10              CA1    117  riptet   
                                    11              CA1    117  riptet   
                                    12              CA1    117  riptet   
                                    13              CA1    117  riptet   
                     8              1              iCA1    116  riptet   
                     9              1              iCA1    100  riptet   
                     14             1              iCA1    106  riptet   

                                                   numcells animal  day  \
animal day epoch_ind tetrode_number neuron_number                         
HPa    1   4         1              1                     7    HPa    1   
                                    2                     7    HPa    1   
                                    3                     7    HPa    1   
                                    4                     7    HPa    1   
                                    5                     7    HPa    1   
                                    6                     7    HPa    1   
                     4              1                    13    HPa    1   
                                    2                    13    HPa    1   
                                    3                    13    HPa    1   
                                    4                    13    HPa    1   
                                    5                    13    HPa    1   
                                    6                    13    HPa    1   
                                    7                    13    HPa    1   
                                    9                    13    HPa    1   
                                    10                   13    HPa    1   
                                    11                   13    HPa    1   
                                    12                   13    HPa    1   
                                    13                   13    HPa    1   
                     8              1                     1    HPa    1   
                     9              1                     1    HPa    1   
                     14             1                     1    HPa    1   

                                                   epoch_ind  tetrode_number  \
animal day epoch_ind tetrode_number neuron_number                              
HPa    1   4         1              1                      4               1   
                                    2                      4               1   
                                    3                      4               1   
                                    4                      4               1   
                                    5                      4               1   
                                    6                      4               1   
                     4              1                      4               4   
                                    2                      4               4   
                                    3                      4               4   
                                    4                      4               4   
                                    5                      4               4   
                                    6                      4               4   
                                    7                      4               4   
                                    9                      4               4   
                                    10                     4               4   
                                    11                     4               4   
                                    12                     4               4   
                                    13                     4               4   
                     8              1                      4               8   
                     9              1                      4               9   
                     14             1                      4              14   

                                                  tetrode_id  \
animal day epoch_ind tetrode_number neuron_number              
HPa    1   4         1              1                 HPa141   
                                    2                 HPa141   
                                    3                 HPa141   
                                    4                 HPa141   
                                    5                 HPa141   
                                    6                 HPa141   
                     4              1                 HPa144   
                                    2                 HPa144   
                                    3                 HPa144   
                                    4                 HPa144   
                                    5                 HPa144   
                                    6                 HPa144   
                                    7                 HPa144   
                                    9                 HPa144   
                                    10                HPa144   
                                    11                HPa144   
                                    12                HPa144   
                                    13                HPa144   
                     8              1                 HPa148   
                     9              1                 HPa149   
                     14             1                HPa1414   

                                                                    csi  \
animal day epoch_ind tetrode_number neuron_number                         
HPa    1   4         1              1               0.15163699023549684   
                                    2                  0.15368081676518   
                                    3               0.06485355648535565   
                                    4               0.12368421052631579   
                                    5               0.07959949937421777   
                                    6               0.08576642335766424   
                     4              1               0.13953488372093023   
                                    2              0.029411764705882353   
                                    3                0.1457641196013289   
                                    4               0.08823529411764706   
                                    5                0.1522468142186452   
                                    6               0.11607142857142858   
                                    7               0.07142857142857142   
                                    9                0.1679902260232132   
                                    10              0.10616438356164383   
                                    11              0.06056701030927835   
                                    12              0.09714285714285714   
                                    13              0.11013215859030837   
                     8              1               0.14345991561181434   
                     9              1               0.10364145658263306   
                     14             1               0.07974423935335988   

                                                               meanrate  \
animal day epoch_ind tetrode_number neuron_number                         
HPa    1   4         1              1                1.4317434210526316   
                                    2                3.0608552631578947   
                                    3                0.7861842105263158   
                                    4                            0.3125   
                                    5                3.2853618421052633   
                                    6                0.4506578947368421   
                     4              1                0.6011513157894737   
                                    2                0.3355263157894737   
                                    3                 1.980263157894737   
                                    4              0.027960526315789474   
                                    5                1.2261513157894737   
                                    6                0.7368421052631579   
                                    7               0.20723684210526316   
                                    9                 1.346217105263158   
                                    10               0.7203947368421053   
                                    11               0.6381578947368421   
                                    12              0.14391447368421054   
                                    13              0.18667763157894737   
                     8              1               0.38980263157894735   
                     9              1                 0.587171052631579   
                     14             1                 6.816611842105263   

                                                   neuron_number numspikes  \
animal day epoch_ind tetrode_number neuron_number                            
HPa    1   4         1              1                          1      1741   
                                    2                          2      3722   
                                    3                          3       956   
                                    4                          4       380   
                                    5                          5      3995   
                                    6                          6       548   
                     4              1                          1       731   
                                    2                          2       408   
                                    3                          3      2408   
                                    4                          4        34   
                                    5                          5      1491   
                                    6                          6       896   
                                    7                          7       252   
                                    9                          9      1637   
                                    10                        10       876   
                                    11                        11       776   
                                    12                        12       175   
                                    13                        13       227   
                     8              1                          1       474   
                     9              1                          1       714   
                     14             1                          1      8289   

                                                            propbursts  \
animal day epoch_ind tetrode_number neuron_number                        
HPa    1   4         1              1              0.48477886272257326   
                                    2               0.5354648038688877   
                                    3               0.2583682008368201   
                                    4               0.4131578947368421   
                                    5              0.30037546933667086   
                                    6               0.2956204379562044   
                     4              1               0.4746922024623803   
                                    2              0.12009803921568628   
                                    3               0.4962624584717608   
                                    4               0.3235294117647059   
                                    5               0.4936284372904091   
                                    6              0.40513392857142855   
                                    7              0.25396825396825395   
                                    9                0.548564447159438   
                                    10              0.3778538812785388   
                                    11             0.23324742268041238   
                                    12              0.3028571428571429   
                                    13              0.3303964757709251   
                     8              1               0.5189873417721519   
                     9              1               0.3641456582633053   
                     14             1               0.2884545783568585   

                                                           spikewidth  \
animal day epoch_ind tetrode_number neuron_number                       
HPa    1   4         1              1               8.404000423452445   
                                    2              10.068207612419641   
                                    3              10.591460255431754   
                                    4                9.56360765114096   
                                    5              11.086349081099883   
                                    6               8.898837340698611   
                     4              1               8.928423068731131   
                                    2               9.606738087324494   
                                    3               8.824516757593013   
                                    4               8.614027432575318   
                                    5               8.542827069173516   
                                    6               8.835866239911471   
                                    7                8.14703602100615   
                                    9               7.677985200040181   
                                    10              9.274112719181899   
                                    11              9.412296547260718   
                                    12              7.361349894539315   
                                    13              8.306329106015445   
                     8              1                9.54469181741173   
                     9              1              10.236683991169903   
                     14             1              11.229290642757745   

                                                  neuron_id  
animal day epoch_ind tetrode_number neuron_number            
HPa    1   4         1              1               HPa1411  
                                    2               HPa1412  
                                    3               HPa1413  
                                    4               HPa1414  
                                    5               HPa1415  
                                    6               HPa1416  
                     4              1               HPa1441  
                                    2               HPa1442  
                                    3               HPa1443  
                                    4               HPa1444  
                                    5               HPa1445  
                                    6               HPa1446  
                                    7               HPa1447  
                                    9               HPa1449  
                                    10             HPa14410  
                                    11             HPa14411  
                                    12             HPa14412  
                                    13             HPa14413  
                     8              1               HPa1481  
                     9              1               HPa1491  
                     14             1              HPa14141  

In [3]:
formula = '1 + trajectory_direction * bs(linear_distance, df=10, degree=3)'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')

def glmfit(spikes, design_matrix, ind):
    print(ind)
    return sm.GLM(spikes, design_matrix, family=sm.families.Poisson(),
                  drop='missing').fit(maxiter=30)

fit = [glmfit(spikes, design_matrix, ind)
       for ind, spikes in enumerate(train_spikes_data)]


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/links.py:422: RuntimeWarning: overflow encountered in exp
  return np.exp(z)
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:347: RuntimeWarning: divide by zero encountered in true_divide
  endog_mu = self._clean(endog/mu)
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:347: RuntimeWarning: invalid value encountered in true_divide
  endog_mu = self._clean(endog/mu)
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:121: RuntimeWarning: invalid value encountered in multiply
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:121: RuntimeWarning: divide by zero encountered in true_divide
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/generalized_linear_model.py:711: RuntimeWarning: invalid value encountered in multiply
  - offset_exposure)
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/regression/linear_model.py:612: RuntimeWarning: invalid value encountered in multiply
  return np.sqrt(self.weights)[:, None]*X
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-3-eed92a39d4ba> in <module>()
      9 
     10 fit = [wtf(spikes, design_matrix, ind)
---> 11        for ind, spikes in enumerate(train_spikes_data)]

<ipython-input-3-eed92a39d4ba> in <listcomp>(.0)
      9 
     10 fit = [wtf(spikes, design_matrix, ind)
---> 11        for ind, spikes in enumerate(train_spikes_data)]

<ipython-input-3-eed92a39d4ba> in wtf(spikes, design_matrix, ind)
      6     print(ind)
      7     return sm.GLM(spikes, design_matrix, family=sm.families.Poisson(),
----> 8                   drop='missing').fit(maxiter=30)
      9 
     10 fit = [wtf(spikes, design_matrix, ind)

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
    710             wlsendog = (lin_pred + self.family.link.deriv(mu) * (self.endog-mu)
    711                         - offset_exposure)
--> 712             wls_results = lm.WLS(wlsendog, wlsexog, self.weights).fit()
    713             lin_pred = np.dot(self.exog, wls_results.params) + offset_exposure
    714             mu = self.family.fitted(lin_pred)

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/regression/linear_model.py in fit(self, method, cov_type, cov_kwds, use_t, **kwargs)
    172                 (not hasattr(self, 'rank'))):
    173 
--> 174                 self.pinv_wexog, singular_values = pinv_extended(self.wexog)
    175                 self.normalized_cov_params = np.dot(self.pinv_wexog,
    176                                         np.transpose(self.pinv_wexog))

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/tools/tools.py in pinv_extended(X, rcond)
    390     X = np.asarray(X)
    391     X = X.conjugate()
--> 392     u, s, vt = np.linalg.svd(X, 0)
    393     s_orig = np.copy(s)
    394     m = u.shape[0]

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/numpy/linalg/linalg.py in svd(a, full_matrices, compute_uv)
   1357 
   1358         signature = 'D->DdD' if isComplexType(t) else 'd->ddd'
-> 1359         u, s, vt = gufunc(a, signature=signature, extobj=extobj)
   1360         u = u.astype(result_t, copy=False)
   1361         s = s.astype(_realType(result_t), copy=False)

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/numpy/linalg/linalg.py in _raise_linalgerror_svd_nonconvergence(err, flag)
     97 
     98 def _raise_linalgerror_svd_nonconvergence(err, flag):
---> 99     raise LinAlgError("SVD did not converge")
    100 
    101 def get_linalg_error_extobj(callback):

LinAlgError: SVD did not converge

In [20]:
bad_ind = 15

In [22]:
train_spikes_data[bad_ind].sum()


Out[22]:
is_spike    116
dtype: int64

In [28]:
train_spikes_data[bad_ind].mean() * sampling_frequency


Out[28]:
is_spike    0.194023
dtype: float64

In [23]:
train_spikes_data[bad_ind].isnull().sum()


Out[23]:
is_spike    0
dtype: int64

In [26]:
def occupancy_normalized_histogram(stat_at_spike, stat, bins, sampling_frequency, ax=None, **kwargs):
    if ax is None:
        ax = plt.gca()
    occupancy, _ = np.histogram(stat, bins=bins)
    binned_stat_at_spike, _ = np.histogram(stat_at_spike, bins=bins)
    width = bins[1] - bins[0]
    ax.bar(bins[:-1], sampling_frequency * binned_stat_at_spike / occupancy, width=width, **kwargs)
    ax.set_xlim((bins.min(), bins.max()))
    
def onh_wrap(distance, **kwargs):
    dataframe = kwargs.pop('data')
    num_bins = kwargs.pop('num_bins')
    sampling_frequency = kwargs.pop('sampling_frequency')
    linear_distance_grid = np.linspace(np.floor(position_info[distance].min()),
                                   np.ceil(position_info[distance].max()),
                                   num_bins)
    occupancy_normalized_histogram(
        dataframe.query('is_spike > 0')[distance],
        dataframe[distance],
        linear_distance_grid, sampling_frequency, **kwargs)

def plot_hist_by_factors(spikes, position_info, covariate, distance, sampling_frequency=1500, num_bins=49):
    grid = sns.FacetGrid(pd.concat([spikes, position_info], axis=1),
                         col=covariate, hue=covariate, col_wrap=5)
    grid.map_dataframe(onh_wrap, distance, sampling_frequency=sampling_frequency, num_bins=num_bins)

In [143]:
plot_hist_by_factors(train_spikes_data[bad_ind], train_position_info,
                     'trajectory_direction', 'linear_distance', num_bins=49)


/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/ipykernel/__main__.py:7: RuntimeWarning: invalid value encountered in true_divide

In [145]:
plot_hist_by_factors(train_spikes_data[bad_ind], train_position_info,
                     'trajectory_category_ind', 'linear_distance', num_bins=49)


/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/ipykernel/__main__.py:7: RuntimeWarning: invalid value encountered in true_divide

In [210]:
formula = '1 + bs(linear_distance, df=10, degree=3)'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
fitted_model = sm.GLM(train_spikes_data[bad_ind], design_matrix, family=sm.families.Poisson()).fit(maxiter=30, drop='missing')

def _get_grid_centers(grid):
    return grid[:-1] + np.diff(grid) / 2

linear_distance_grid_num_bins = 49
linear_distance_grid = np.linspace(np.floor(position_info.linear_distance.min()),
                                   np.ceil(position_info.linear_distance.max()),
                                   linear_distance_grid_num_bins)
linear_distance_grid_centers = _get_grid_centers(linear_distance_grid)
predictors = {'linear_distance': linear_distance_grid_centers}
predict_design_matrix = patsy.build_design_matrices([design_matrix.design_info], predictors)[0]
plt.plot(linear_distance_grid_centers, fitted_model.predict(predict_design_matrix) * sampling_frequency)
plt.xlim((linear_distance_grid.min(), linear_distance_grid.max()));

print(fitted_model.summary())


                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:               is_spike   No. Observations:               896799
Model:                            GLM   Df Residuals:                   896788
Model Family:                 Poisson   Df Model:                           10
Link Function:                    log   Scale:                             1.0
Method:                          IRLS   Log-Likelihood:                -975.45
Date:                Fri, 18 Nov 2016   Deviance:                       1718.9
Time:                        15:32:57   Pearson chi2:                 6.46e+05
No. Iterations:                    30                                         
===========================================================================================================
                                              coef    std err          z      P>|z|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------------------------
Intercept                                 -59.7761     30.329     -1.971      0.049      -119.219    -0.333
bs(linear_distance, df=10, degree=3)[0]    82.8442     30.329      2.732      0.006        23.401   142.287
bs(linear_distance, df=10, degree=3)[1]   -96.5851     30.329     -3.185      0.001      -156.028   -37.142
bs(linear_distance, df=10, degree=3)[2]    43.7285     30.329      1.442      0.149       -15.714   103.171
bs(linear_distance, df=10, degree=3)[3]    48.7238     30.329      1.607      0.108       -10.719   108.167
bs(linear_distance, df=10, degree=3)[4]    49.7769     30.340      1.641      0.101        -9.689   109.243
bs(linear_distance, df=10, degree=3)[5]    47.5414     30.338      1.567      0.117       -11.920   107.003
bs(linear_distance, df=10, degree=3)[6]    55.6441     30.334      1.834      0.067        -3.809   115.097
bs(linear_distance, df=10, degree=3)[7]    49.5262     30.345      1.632      0.103        -9.949   109.001
bs(linear_distance, df=10, degree=3)[8]    49.0208     30.398      1.613      0.107       -10.558   108.600
bs(linear_distance, df=10, degree=3)[9]    45.9183     30.739      1.494      0.135       -14.330   106.167
===========================================================================================================

In [211]:
formula = '1 + trajectory_direction'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
fitted_model = sm.GLM(train_spikes_data[bad_ind], design_matrix, family=sm.families.Poisson()).fit(maxiter=30, drop='missing')

predictors = {'trajectory_direction': ['Inbound', 'Outbound']}
predict_design_matrix = patsy.build_design_matrices([design_matrix.design_info], predictors)[0]
inbound = plt.bar(0,  fitted_model.predict(predict_design_matrix)[0] * sampling_frequency)
outbound = plt.bar(1,  fitted_model.predict(predict_design_matrix)[1] * sampling_frequency)
plt.xticks([0.5, 1.5], ['Inbound', 'Outbound']);
print(fitted_model.summary())


                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:               is_spike   No. Observations:               896799
Model:                            GLM   Df Residuals:                   896797
Model Family:                 Poisson   Df Model:                            1
Link Function:                    log   Scale:                             1.0
Method:                          IRLS   Log-Likelihood:                -1153.6
Date:                Fri, 18 Nov 2016   Deviance:                       2075.1
Time:                        15:33:40   Pearson chi2:                 8.97e+05
No. Iterations:                    13                                         
====================================================================================================
                                       coef    std err          z      P>|z|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------------------
Intercept                           -8.8168      0.130    -67.723      0.000        -9.072    -8.562
trajectory_direction[T.Outbound]    -0.2599      0.186     -1.400      0.162        -0.624     0.104
====================================================================================================

In [212]:
formula = '1 + bs(linear_distance, df=10, degree=3) + trajectory_direction'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
fitted_model = sm.GLM(train_spikes_data[bad_ind], design_matrix, family=sm.families.Poisson()).fit(maxiter=30, drop='missing')

def _get_grid_centers(grid):
    return grid[:-1] + np.diff(grid) / 2

linear_distance_grid_num_bins = 49
linear_distance_grid = np.linspace(np.floor(position_info.linear_distance.min()),
                                   np.ceil(position_info.linear_distance.max()),
                                   linear_distance_grid_num_bins)
linear_distance_grid_centers = _get_grid_centers(linear_distance_grid)

fig, ax = plt.subplots(1, 2, sharex=True, sharey=True)

predictors = {'linear_distance': linear_distance_grid_centers, 
              'trajectory_direction': ['Inbound'] *
                  len(linear_distance_grid_centers)}
predict_design_matrix = patsy.build_design_matrices([design_matrix.design_info], predictors)[0]
ax[0].plot(linear_distance_grid_centers, fitted_model.predict(predict_design_matrix) * sampling_frequency)
ax[0].set_xlim((linear_distance_grid.min(), linear_distance_grid.max()));
ax[0].set_title('Inbound')

predictors = {'linear_distance': linear_distance_grid_centers, 
              'trajectory_direction': ['Outbound'] *
                  len(linear_distance_grid_centers)}
predict_design_matrix = patsy.build_design_matrices([design_matrix.design_info], predictors)[0]
ax[1].plot(linear_distance_grid_centers, fitted_model.predict(predict_design_matrix) * sampling_frequency)
ax[1].set_xlim((linear_distance_grid.min(), linear_distance_grid.max()));
ax[1].set_title('Outbound')
print(fitted_model.summary())


                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:               is_spike   No. Observations:               896799
Model:                            GLM   Df Residuals:                   896787
Model Family:                 Poisson   Df Model:                           11
Link Function:                    log   Scale:                             1.0
Method:                          IRLS   Log-Likelihood:                -974.87
Date:                Fri, 18 Nov 2016   Deviance:                       1717.7
Time:                        15:35:01   Pearson chi2:                 6.23e+05
No. Iterations:                    30                                         
===========================================================================================================
                                              coef    std err          z      P>|z|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------------------------
Intercept                                 -59.6181     32.660     -1.825      0.068      -123.631     4.395
trajectory_direction[T.Outbound]           -0.2033   7.36e-06  -2.76e+04      0.000        -0.203    -0.203
bs(linear_distance, df=10, degree=3)[0]    81.3277     32.660      2.490      0.013        17.315   145.340
bs(linear_distance, df=10, degree=3)[1]   -90.5725     32.660     -2.773      0.006      -154.585   -26.560
bs(linear_distance, df=10, degree=3)[2]    43.8149     32.660      1.342      0.180       -20.198   107.828
bs(linear_distance, df=10, degree=3)[3]    48.6011     32.660      1.488      0.137       -15.412   112.614
bs(linear_distance, df=10, degree=3)[4]    49.8011     32.671      1.524      0.127       -14.233   113.835
bs(linear_distance, df=10, degree=3)[5]    47.4810     32.669      1.453      0.146       -16.548   111.510
bs(linear_distance, df=10, degree=3)[6]    55.6239     32.665      1.703      0.089        -8.398   119.645
bs(linear_distance, df=10, degree=3)[7]    49.3887     32.675      1.512      0.131       -14.653   113.431
bs(linear_distance, df=10, degree=3)[8]    49.0174     32.724      1.498      0.134       -15.121   113.156
bs(linear_distance, df=10, degree=3)[9]    45.9343     33.047      1.390      0.165       -18.837   110.706
===========================================================================================================

In [197]:
formula = '1 + bs(linear_distance, df=10, degree=3) * trajectory_direction'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
fitted_model = sm.GLM(train_spikes_data[bad_ind], design_matrix, family=sm.families.Poisson()).fit(maxiter=30, drop='missing')


/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/links.py:422: RuntimeWarning: overflow encountered in exp
  return np.exp(z)
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:347: RuntimeWarning: divide by zero encountered in true_divide
  endog_mu = self._clean(endog/mu)
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:347: RuntimeWarning: invalid value encountered in true_divide
  endog_mu = self._clean(endog/mu)
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:121: RuntimeWarning: invalid value encountered in multiply
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/families/family.py:121: RuntimeWarning: divide by zero encountered in true_divide
  return 1. / (self.link.deriv(mu)**2 * self.variance(mu))
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/generalized_linear_model.py:711: RuntimeWarning: invalid value encountered in multiply
  - offset_exposure)
/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/regression/linear_model.py:612: RuntimeWarning: invalid value encountered in multiply
  return np.sqrt(self.weights)[:, None]*X
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-197-dc42b2dcd3eb> in <module>()
      2 design_matrix = patsy.dmatrix(
      3     formula, train_position_info, return_type='dataframe')
----> 4 fitted_model = sm.GLM(train_spikes_data[bad_ind], design_matrix, family=sm.families.Poisson()).fit(maxiter=30, drop='missing')

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/genmod/generalized_linear_model.py in fit(self, start_params, maxiter, method, tol, scale, cov_type, cov_kwds, use_t, **kwargs)
    710             wlsendog = (lin_pred + self.family.link.deriv(mu) * (self.endog-mu)
    711                         - offset_exposure)
--> 712             wls_results = lm.WLS(wlsendog, wlsexog, self.weights).fit()
    713             lin_pred = np.dot(self.exog, wls_results.params) + offset_exposure
    714             mu = self.family.fitted(lin_pred)

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/regression/linear_model.py in fit(self, method, cov_type, cov_kwds, use_t, **kwargs)
    172                 (not hasattr(self, 'rank'))):
    173 
--> 174                 self.pinv_wexog, singular_values = pinv_extended(self.wexog)
    175                 self.normalized_cov_params = np.dot(self.pinv_wexog,
    176                                         np.transpose(self.pinv_wexog))

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/statsmodels/tools/tools.py in pinv_extended(X, rcond)
    390     X = np.asarray(X)
    391     X = X.conjugate()
--> 392     u, s, vt = np.linalg.svd(X, 0)
    393     s_orig = np.copy(s)
    394     m = u.shape[0]

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/numpy/linalg/linalg.py in svd(a, full_matrices, compute_uv)
   1357 
   1358         signature = 'D->DdD' if isComplexType(t) else 'd->ddd'
-> 1359         u, s, vt = gufunc(a, signature=signature, extobj=extobj)
   1360         u = u.astype(result_t, copy=False)
   1361         s = s.astype(_realType(result_t), copy=False)

/Users/edeno/anaconda3/envs/Jadhav-2016-Data-Analysis/lib/python3.5/site-packages/numpy/linalg/linalg.py in _raise_linalgerror_svd_nonconvergence(err, flag)
     97 
     98 def _raise_linalgerror_svd_nonconvergence(err, flag):
---> 99     raise LinAlgError("SVD did not converge")
    100 
    101 def get_linalg_error_extobj(callback):

LinAlgError: SVD did not converge

In [215]:
formula = '1 + bs(linear_distance, df=10, degree=3) * trajectory_category_ind'
design_matrix = patsy.dmatrix(
    formula, train_position_info, return_type='dataframe')
fitted_model = sm.GLM(train_spikes_data[bad_ind], design_matrix, family=sm.families.Poisson()).fit(maxiter=30, drop='missing')

print(fitted_model.summary())

fig, ax = plt.subplots(1, 4)

for ind in np.arange(4):
    predictors = {'linear_distance': linear_distance_grid_centers, 
                  'trajectory_category_ind': [ind] *
                      len(linear_distance_grid_centers)}
    predict_design_matrix = patsy.build_design_matrices([design_matrix.design_info], predictors)[0]
    ax[ind].plot(linear_distance_grid_centers, fitted_model.predict(predict_design_matrix) * sampling_frequency)
    ax[ind].set_xlim((linear_distance_grid.min(), linear_distance_grid.max()));
    ax[ind].set_title(ind)

plt.tight_layout()


                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:               is_spike   No. Observations:               896799
Model:                            GLM   Df Residuals:                   896777
Model Family:                 Poisson   Df Model:                           21
Link Function:                    log   Scale:                             1.0
Method:                          IRLS   Log-Likelihood:                -907.89
Date:                Fri, 18 Nov 2016   Deviance:                       1583.8
Time:                        15:49:38   Pearson chi2:                 3.94e+05
No. Iterations:                    30                                         
===================================================================================================================================
                                                                      coef    std err          z      P>|z|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------------------------------------------------
Intercept                                                         -43.4638    768.050     -0.057      0.955     -1548.815  1461.887
bs(linear_distance, df=10, degree=3)[0]                             8.9599    773.503      0.012      0.991     -1507.077  1524.997
bs(linear_distance, df=10, degree=3)[1]                            74.3986    769.271      0.097      0.923     -1433.344  1582.142
bs(linear_distance, df=10, degree=3)[2]                            -2.4648    770.300     -0.003      0.997     -1512.225  1507.295
bs(linear_distance, df=10, degree=3)[3]                            43.6134    768.103      0.057      0.955     -1461.841  1549.068
bs(linear_distance, df=10, degree=3)[4]                            13.2208    768.185      0.017      0.986     -1492.394  1518.836
bs(linear_distance, df=10, degree=3)[5]                            32.3657    768.057      0.042      0.966     -1472.999  1537.730
bs(linear_distance, df=10, degree=3)[6]                            32.6647    768.061      0.043      0.966     -1472.707  1538.037
bs(linear_distance, df=10, degree=3)[7]                            36.3329    768.090      0.047      0.962     -1469.096  1541.761
bs(linear_distance, df=10, degree=3)[8]                            -5.9135    768.885     -0.008      0.994     -1512.899  1501.073
bs(linear_distance, df=10, degree=3)[9]                           -42.1140    768.847     -0.055      0.956     -1549.025  1464.798
trajectory_category_ind                                            -5.4743    194.522     -0.028      0.978      -386.730   375.781
bs(linear_distance, df=10, degree=3)[0]:trajectory_category_ind    19.7534    195.867      0.101      0.920      -364.140   403.647
bs(linear_distance, df=10, degree=3)[1]:trajectory_category_ind   -40.1711    194.823     -0.206      0.837      -422.017   341.675
bs(linear_distance, df=10, degree=3)[2]:trajectory_category_ind    13.6528    195.077      0.070      0.944      -368.691   395.997
bs(linear_distance, df=10, degree=3)[3]:trajectory_category_ind     2.4072    194.535      0.012      0.990      -378.874   383.688
bs(linear_distance, df=10, degree=3)[4]:trajectory_category_ind    11.2728    194.556      0.058      0.954      -370.049   392.595
bs(linear_distance, df=10, degree=3)[5]:trajectory_category_ind     5.2733    194.524      0.027      0.978      -375.986   386.533
bs(linear_distance, df=10, degree=3)[6]:trajectory_category_ind     7.5479    194.525      0.039      0.969      -373.713   388.809
bs(linear_distance, df=10, degree=3)[7]:trajectory_category_ind     4.8172    194.532      0.025      0.980      -376.459   386.094
bs(linear_distance, df=10, degree=3)[8]:trajectory_category_ind    15.5970    194.732      0.080      0.936      -366.070   397.264
bs(linear_distance, df=10, degree=3)[9]:trajectory_category_ind    24.3362    194.736      0.125      0.901      -357.339   406.011
===================================================================================================================================

Check position dataframe


In [19]:
def scatter_wrap(x, y, df, **kwargs):
    data = kwargs.pop('data')
    ax = plt.gca()
    ax.plot(df[x], df[y], color='grey', zorder=1, linewidth=5, alpha=0.4)
    ax.scatter(data[x], data[y], **kwargs, zorder=2, s=10)

sns.set(font_scale=2)
grid = sns.FacetGrid(position_info, col='trajectory_direction',
                     hue='trajectory_category_ind', size=4, aspect=1.5)
grid.map_dataframe(scatter_wrap, 'x_position', 'y_position', position_info)
grid.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)
grid.add_legend()

grid = sns.FacetGrid(position_info.reset_index(), col='trajectory_direction',
                     hue='trajectory_category_ind', size=4, aspect=2.5)
grid.map_dataframe(scatter_wrap, 'time', 'linear_position', position_info.reset_index())
grid.set(xlim=(position_info.index.min(), position_info.index.max()))
grid.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)

grid = sns.FacetGrid(position_info.reset_index(), col='trajectory_direction',
                     hue='trajectory_category_ind', size=4, aspect=2.5)
grid.map_dataframe(scatter_wrap, 'time', 'linear_distance', position_info.reset_index())
grid.set(xlim=(position_info.index.min(), position_info.index.max()))
grid.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)


Out[19]:
<seaborn.axisgrid.FacetGrid at 0x11c4c30b8>

In [20]:
def _linear_position(df):
    is_left_arm = (df.trajectory_category_ind == 1) | (
        df.trajectory_category_ind == 2)
    return np.where(is_left_arm, -1 * df.linear_distance, df.linear_distance)


def _trial_number(df):
    return np.cumsum(df.trajectory_category_ind.diff().fillna(0) > 0) + 1


def _trajectory_turn(df):
    trajectory_turn = {0: np.nan, 1: 'Left', 2: 'Right', 3: 'Left', 4: 'Right'}
    return df.trajectory_category_ind.map(trajectory_turn)


def _trajectory_direction(df):
    trajectory_direction = {0: np.nan, 1: 'Outbound',
                            2: 'Inbound', 3: 'Outbound', 4: 'Inbound'}
    return df.trajectory_category_ind.map(trajectory_direction)

time = data_processing.get_trial_time(epoch_index, animals)
position = (pd.concat([data_processing.get_linear_position_structure(epoch_index, animals),
                       data_processing.get_position_dataframe(epoch_index, animals)], axis=1)
            .assign(trajectory_direction=_trajectory_direction)
            .assign(trajectory_turn=_trajectory_turn)
            .assign(trial_number=_trial_number)
            .assign(linear_position=_linear_position)
            )

In [21]:
def scatter_wrap(x, y, df, **kwargs):
    data = kwargs.pop('data')
    ax = plt.gca()
    ax.plot(df[x], df[y], color='grey', zorder=1, linewidth=5, alpha=0.4)
    ax.scatter(data[x], data[y], **kwargs, zorder=2, s=10)

sns.set(font_scale=2)
grid = sns.FacetGrid(position, col='trajectory_direction',
                     hue='trajectory_category_ind', size=4, aspect=1.5)
grid.map_dataframe(scatter_wrap, 'x_position', 'y_position', position)
grid.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)
grid.add_legend()

grid = sns.FacetGrid(position.reset_index(), col='trajectory_direction',
                     hue='trajectory_category_ind', size=4, aspect=2.5)
grid.map_dataframe(scatter_wrap, 'time', 'linear_position', position.reset_index())
grid.set(xlim=(position.index.min(), position.index.max()))
grid.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)

grid = sns.FacetGrid(position.reset_index(), col='trajectory_direction',
                     hue='trajectory_category_ind', size=4, aspect=2.5)
grid.map_dataframe(scatter_wrap, 'time', 'linear_distance', position.reset_index())
grid.set(xlim=(position.index.min(), position.index.max()))
grid.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)


Out[21]:
<seaborn.axisgrid.FacetGrid at 0x11e6b7c50>

In [22]:
sns.set(font_scale=2)
grid = sns.FacetGrid(train_position_info, col='trajectory_direction',
                     hue='trajectory_category_ind', size=4, aspect=1.5)
grid.map_dataframe(scatter_wrap, 'x_position', 'y_position', position)
grid.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)
grid.add_legend()

grid = sns.FacetGrid(train_position_info.reset_index(), col='trajectory_direction',
                     hue='trajectory_category_ind', size=4, aspect=2.5)
grid.map_dataframe(scatter_wrap, 'time', 'linear_position', position.reset_index())
grid.set(xlim=(position.index.min(), position.index.max()))
grid.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)

grid = sns.FacetGrid(train_position_info.reset_index(), col='trajectory_direction',
                     hue='trajectory_category_ind', size=4, aspect=2.5)
grid.map_dataframe(scatter_wrap, 'time', 'linear_distance', position.reset_index())
grid.set(xlim=(position.index.min(), position.index.max()))
grid.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)


Out[22]:
<seaborn.axisgrid.FacetGrid at 0x11eb76c88>

In [ ]: